
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;

import java.util.*;
//自动爬取东方财经新闻
public class  dongfang{
    public static void main(String[] args) throws InterruptedException {
        //Map<String,String> map = new HashMap<String,String>() ;
        dongfang d= new dongfang();
        String url ="http://finance.eastmoney.com/a/cgnjj.html";
        d.getUrl(url);
    }
    public  void getUrl(String url) throws InterruptedException {
        Analysis xian = new Analysis();
        System.setProperty("webdriver.chrome.driver", "E:\\Java\\工具包\\Chromedriver\\chromedriver.exe");
        WebDriver driver = new ChromeDriver();
        WebDriver driver2 = new ChromeDriver();
        driver.get(url);
        Thread.sleep(3000);
        int i=1;
        while(i<=25){
            List<WebElement> list = driver.findElements(By.cssSelector(" div.repeatList li a"));
            if(list.size()>0) {//页面解析
                for (WebElement element : list) {
                    String text = element.getText();
                    String href = element.getAttribute("href");
                    Thread.sleep(2000);
                    xian.run(driver2,text,href);
                    xian.join();
                }
            }
            //进入下一页
            List<WebElement> list2 = driver.findElements(By.className("page-btn"));
            if (list2 !=null &&list2.size()>1){
                String url1 = list2.get(1).getAttribute("href");
                driver.get(url1);
            }
            else {
                String url2 = list2.get(0).getAttribute("href");
                driver.get(url2);
            }

            i++;
        }
        driver2.quit();
        driver.quit();
    }
    //爬取所需内容
    class Analysis extends Thread{
        public void run( WebDriver driver2, String title, String href) throws InterruptedException {

            driver2.get(href);
            Thread.sleep(2000);
            String goal_context= null;
            List<WebElement> list = driver2.findElements(By.tagName("p"));
            for (WebElement context:list){
                goal_context = goal_context+"\n"+context.getText();
                //System.out.println(goal_context);
            }
            String goal = "标题："+title+"\n"+"href:"+href+"\n"+"正文："+goal_context;
            System.out.println(goal);
            Thread.sleep(2000);
            //driver2.close();

        }
    }}